Setting up the workspace and loading data

# Packages
# ==============================================================================
library(tidyr)
library(ggplot2)
library(knitr)
library(DT)
library(stringr)
library(xtable)
library(hrbrthemes)
library(dplyr)
library(forcats)
library(finalfit)
library(dplyr)
library(stringr)
library(janitor)
library(lme4)
library(extrafont)
library(cowplot)
library(knitr)
library(parameters)
library(here)

# Load fonts
extrafont::loadfonts(quiet = TRUE)


# Load data
# ==============================================================================
bats_res = readRDS("data/Ethiopia_bat-info_test-summary.RDS")
bats_res$SiteName = trimws(gsub("-\\<Bat\\>|\\<Bat\\>", "", bats_res$SiteName))
bats_res$ConcurrentSamplingSite = bats_res$SiteName

tests = readRDS("data/Ethiopia_bat-test-info.RDS")
tests$SiteName = trimws(gsub("-\\<Bat\\>|\\<Bat\\>", "", tests$SiteName))
tests$Interface = ifelse(tests$ScientificName %in% "Rhinopoma hardwickii",
                      "Cave", "Building")

events = readRDS("data/Ethiopia_bat-sampling-events.RDS")

df = bats_res
df$Family = str_to_title(df$Family)
df$AgeClass = word(df$AgeClass, 1)

df$Interface = ifelse(df$ScientificName %in% "Rhinopoma hardwickii",
                      "Cave", "Building")

date_order = unique(format(as.Date(sort(df$EventDate), format = "%Y-%m-%d"),
                        "%Y-%B"))
df$EventMY = format(as.Date(df$EventDate, format = "%Y-%m-%d"), "%Y-%B")
df$EventMY = factor(df$EventMY, levels = date_order, ordered = TRUE)

tests = left_join(tests, df[ , c("AnimalID", "Family")], by = "AnimalID")



Sampling events

df %>%
    group_by(SiteName, EventDate, SeasonModelled) %>%
    tally(name = "No. of bats sampled") %>%
    group_by(EventDate) %>%
    arrange(SiteName, EventDate) %>%
    adorn_totals("row") %>%
    datatable(options = list(dom = 't'), rownames = FALSE)



Viral families tested

df %>%
    group_by(tests_done) %>%
    tally() %>%
    datatable(options = list(dom = 't'), rownames = FALSE)



Species of bats tested

df %>%
    select(Family, ScientificName) %>%
    group_by(Family, ScientificName) %>%
    tally() %>%
    adorn_totals("row") %>%
    datatable(options = list(dom = 't'), rownames = FALSE)



Number of bats testing positive

Number of bats tested = 589

Number of bats testing positive = 99

Percent bats testing positive = 16.8081494



Figure 1. Sampling sites

Made with ArcGIS 10.1

out_file = file.path(here(), "manuscript_figures")
myimages = list.files(out_file, pattern = glob2rx("Figure-1*png"),
                      full.names = TRUE)
include_graphics(myimages)



Table 1. Summary of bats sampled and viral testing results

df_ani =

    tests %>%
    select(SiteName, Interface, Family, ScientificName, AnimalID, VirusGroup) %>%
    group_by(SiteName, Interface, Family, ScientificName) %>%
    summarize(total_bats = length(unique(AnimalID))) %>%
    as.data.frame() %>%
    adorn_totals("row")



df_pos_ani =

    tests %>%
    select(SiteName, Family, ScientificName, AnimalID, VirusGroup) %>%
    filter( !is.na(VirusGroup)) %>%
    group_by(SiteName, Family, ScientificName) %>%
    summarize(pos_bats = length(unique(AnimalID))) %>%
    as.data.frame() %>%
    adorn_totals("row")


df_ani$pos_bats = paste0(
                      round(df_pos_ani$pos_bats/df_ani$total_bats * 100),
                      "% (", df_pos_ani$pos_bats, ")")
                      

df_spmn = 

    tests %>%
    select(SiteName, ScientificName, SpecimenType, SpecimenID) %>%
    mutate(SpecimenType = paste0("n_",
                                 gsub(" ", "_", SpecimenType))) %>%
    group_by(SiteName, ScientificName, SpecimenType) %>%
    summarize(total = length(unique(SpecimenID))) %>%
    as.data.frame() %>%
    spread(SpecimenType, total) %>%
    adorn_totals("row") %>%
    select(-SiteName)

df_pos_spmn =

    tests %>%
    select(SiteName, ScientificName, SpecimenType, SpecimenID, VirusGroup) %>%
    filter( !is.na(VirusGroup)) %>%
    mutate(SpecimenType = gsub(" ", "_", SpecimenType)) %>%
    group_by(SiteName, ScientificName, SpecimenType) %>%
    summarize(pos = length(unique(SpecimenID))) %>%
    as.data.frame() %>%
    spread(SpecimenType, pos) %>%
    mutate_all(~replace_na(., 0)) %>%
    adorn_totals("row") %>%
    select( -SiteName)

df_pos_spmn$oral_swab = paste0(
                            round(df_pos_spmn$oral_swab/df_spmn$n_oral_swab * 100),
                            "% (", df_pos_spmn$oral_swab, ")")

df_pos_spmn$rectal_swab = paste0(
                              round(df_pos_spmn$rectal_swab/df_spmn$n_rectal_swab * 100),
                              "% (", df_pos_spmn$rectal_swab, ")")


pos_viruses =

    df %>%
    select(ScientificName, pos_virus) %>%
    filter( !is.na(pos_virus)) %>%
    separate_rows(pos_virus, sep = "; ") %>%
    group_by(ScientificName, pos_virus) %>%
    tally() %>%
    mutate(viruses = paste0(pos_virus, " (", n, ")")) %>%
    group_by(ScientificName) %>%
    summarize(
        Viruses = paste0(sort(unique(viruses)), collapse = "<br/><br/>"))


bat_virus_info = left_join(df_ani, df_pos_spmn, by = "ScientificName")
bat_virus_info = left_join(bat_virus_info, pos_viruses, by = "ScientificName")

datatable(bat_virus_info, escape = FALSE, rownames = FALSE)



Figure 2. Summary by sampling month, season, bat species, and sampling interface/location


Figure subcomponent 1: positives by sampling month and season

p_df = df %>%
    group_by(EventMY, is_positive) %>%
    arrange(EventMY) %>%
    summarize(n = n()) %>%
    mutate(is_positive = ifelse(is_positive, "Virus Positive", "Virus Negative")) %>%
    mutate(is_positive = factor(is_positive, levels = c("Virus Positive", "Virus Negative")))

levels(p_df$EventMY) = gsub("-", "\n", levels(p_df$EventMY))

event_plot = ggplot(p_df, aes(x = EventMY, y = n, fill = is_positive, label = n)) +
    geom_bar(stat="identity", width = 0.5) +
    scale_fill_manual(values = c("#BD4F2C", "#2C42BD"), name = "Test result") +
    theme_ipsum(base_size = 12) +
    ylab("Number of bats") +
    xlab("") +
    theme(
        panel.grid.minor.x = element_blank(),
        panel.grid.major.x = element_blank(),
        panel.grid = element_blank(),
        panel.border = element_blank(),
        legend.position = "top") +
    scale_y_continuous(expand = c(0,1))

p_season_df = df %>%
    group_by(EventMY, SeasonModelled) %>%
    arrange(EventMY) %>%
    summarize(n = n())

levels(p_season_df$EventMY) = gsub("-", "\n", levels(p_season_df$EventMY))

event_plot = event_plot + geom_text(data = p_season_df,
                       aes(label = SeasonModelled,
                           x = EventMY, y = n,
                           fill = NULL),
                       nudge_y = 2,
                       size = 3)


cowplot::save_plot("results/figs/event-plot.pdf", event_plot, base_height = 6)
cowplot::save_plot("results/figs/event-plot.png", event_plot, base_height = 6)

event_plot



Figure subcomponent 2: bat species tested

bat_plot = df %>%
    select(ScientificName, is_positive) %>%
    group_by(ScientificName, is_positive) %>%
    summarize(n = n()) %>%
    arrange(desc(n)) %>%
    as.data.frame %>%
    mutate(ScientificName = factor(ScientificName,
                                   levels = c(
                                                "Neoromicia cf. somalica",
                                                "Mops midas",
                                                "Chaerephon pumilus",
                                                "Rhinopoma hardwickii"
                                            ))) %>%
    ggplot( aes(x = ScientificName, y= -n,
                fill = as.factor(-is_positive), width = 0.75)) +
    geom_bar(stat="identity") +
    scale_fill_manual(values = c("#BD4F2C", "#2C42BD"), name = "") +
    scale_x_discrete(name = "", position = "top") +
    scale_y_continuous(name = "No. of bats",
                       breaks = seq(0, -300, by = -50),  # y axis values (before coord_flip) 
                       labels = seq(0,  300, by =  50)) +
    coord_flip() +
    theme_ipsum(base_size = 12, plot_title_size = 12) +
    theme(
        panel.grid.minor.y = element_blank(),
        panel.grid.major.y = element_blank(),
        legend.position="none") +
    xlab("") +
    ylab("Number of bats") +
    labs(title = "Species tested")

cowplot::save_plot("results/figs/bat-plot.pdf", bat_plot, base_height = 3)

bat_plot



Figure subcomponent 3: Pie charts for species-event timeline

sp_df = df %>%
    select(ScientificName, is_positive, EventDate) %>%
    group_by(ScientificName, is_positive, EventDate) %>%
    summarize(n = n()) %>%
    arrange(desc(n)) %>%
    mutate(sp_e = paste0(EventDate, "_", ScientificName)) %>%
    as.data.frame

sp_l = split(sp_df, sp_df$sp_e)

# ref:
# http://www.sthda.com/english/wiki/ggplot2-pie-chart-quick-start-guide-r-software-and-data-visualization

blank_theme = theme_minimal()+
    theme(
        axis.title.x = element_blank(),
        axis.title.y = element_blank(),
        axis.text.x = element_blank(),
        panel.border = element_blank(),
        panel.grid=element_blank(),
        axis.ticks = element_blank(),
        plot.title=element_text(size=14, face="bold"),
        legend.position = "none"
    )


out_dir = "results/figs/event_pie"
if( !dir.exists(out_dir)) dir.create(out_dir)

invisible(lapply(sp_l, function(sp, out_dir) {
    
    if(nrow(sp) > 1) {
        sp_p = sp %>%
            ggplot(aes(x = "", y = n, fill = as.factor(-is_positive))) +
            geom_bar(width = 1, stat="identity") +
            scale_fill_manual(values = c("#BD4F2C", "#2C42BD"), name = "") +
            coord_polar("y", start=0) +
            theme_minimal() +
            blank_theme

        
    } else {
        sp_p = sp %>%
            ggplot(aes(x = "", y = n, fill = "#B9B7B6")) +
            geom_bar(width = 1, stat="identity") +
            scale_fill_manual(values = "#2C42BD", name = "") +
            coord_polar("y", start=0) +
            theme_minimal() +
            blank_theme

    }

    out_name_pdf = file.path(out_dir, paste0(gsub(" ", "_", unique(sp$sp_e)), ".pdf"))
        out_name_png = file.path(out_dir, paste0(gsub(" ", "_", unique(sp$sp_e)), ".png"))
        cowplot::save_plot(out_name_pdf, sp_p, base_height = .5)
        cowplot::save_plot(out_name_png, sp_p, base_height = .5)
        
}, out_dir))
out_dir = file.path(here(), "results/figs/event_pie")
myimages = list.files(out_dir, pattern = ".png", full.names = TRUE)
include_graphics(myimages)

Final Figure 2 compiled with above subcomponents in Affinity Designer 1.8.3

out_file = file.path(here(), "manuscript_figures")
myimages = list.files(out_file, pattern = glob2rx("Figure-2*png"),
                      full.names = TRUE)
include_graphics(myimages)



Figure 3. Viral findings by bat species and sampling interface



Figure subcomponent 1: bar chart

df_p =

    df %>%
    mutate(SeasonModelled = factor(SeasonModelled),
           SeasonModelled = fct_rev(SeasonModelled),
           ScientificName = ifelse(ScientificName %in% "Rhinopoma hardwickii",
                                   paste0(ScientificName, " (Cave)"),
                                   paste0(ScientificName, " (Building)")))
    

virus_plot =

    df_p %>%
    select(pos_virus, pos_virus_n, pos_virus_family,
           ScientificName, SeasonModelled) %>%
    drop_na() %>%
    mutate(pos_virus = gsub("; ", "\nand ", pos_virus)) %>%
    group_by(pos_virus, pos_virus_n, pos_virus_family,
             ScientificName, SeasonModelled) %>%
    summarize(n = n()) %>%
    arrange(pos_virus_family, desc(n)) %>%
    as.data.frame %>%
    mutate(pos_virus = factor(pos_virus, unique(pos_virus)),
           pos_virus = fct_rev(pos_virus)) %>%
    ggplot(aes(x = reorder(pos_virus, n), y=n, fill = ScientificName, label = n) ) +
    facet_wrap( ~ SeasonModelled ) +
    scale_fill_manual(values = c("#CC6677", "#44AA99", "#DDCC77", "#AA4499"),
                      name = "Bat species and\nsampling interface") +
    geom_bar(stat="identity") +
    ylim(0, 80) +
    # geom_text( size = 3, nudge_y = 4) +
    coord_flip() +
    theme_ipsum(base_size = 10) +
    theme(
        axis.text.y = element_text(size = 8),
        panel.grid.minor.y = element_blank(),
        panel.grid.major.y = element_blank(),
        legend.position="right",
        legend.text = element_text(face = "italic")
    ) +
    xlab("") +
    ylab("Number of bats sampled") +
    labs(title = "Viruses detected in bats")


cowplot::save_plot("results/figs/virus-plot.pdf",
                   virus_plot, base_height = 6)

virus_plot



Figure subcomponent 2: Extra legend information

df %>%
group_by(ScientificName, SeasonModelled, is_positive) %>%
    tally() %>%
    mutate(is_positive = ifelse(is_positive, "Positive", "Negative")) %>%
    pivot_wider(names_from = is_positive, values_from = n) %>%
    adorn_totals("col") %>%
    mutate_all(~replace_na(., 0)) %>%
    mutate(pos_percent = round((Positive/Total * 100), 2)) %>%
    mutate(info = paste0(pos_percent, "% (", Positive, "/", Total, ")")) %>%
    select(ScientificName, SeasonModelled, info) %>%
    pivot_wider(names_from = SeasonModelled, values_from = info) %>%
    datatable(options = list(dom = 't'))



Final Figure 3 compiled with above subcomponents in Affinity Designer 1.8.3

out_file = file.path(here(), "manuscript_figures")
myimages = list.files(out_file, pattern = glob2rx("Figure-3*png"),
                      full.names = TRUE)
include_graphics(myimages)



Viruses detected

tbl = xtable(table(unlist(strsplit(df$pos_virus[ df$is_positive ], "; "))))
names(tbl) = "No. of bats testing positive for virus"
datatable(tbl)



Coinfections

df %>%
    filter( is_positive & pos_virus_n > 1 ) %>%
    select(SiteName, ScientificName, pos_virus) %>%
    group_by(SiteName, ScientificName, pos_virus) %>%
    summarize(n_bats = n()) %>%
    datatable(colnames = c("SiteName", "ScientificName", "Virus coinfection",
                           "No. of bats"))



Virus detection by specimen type

Breakdown of positive specimen types (at individual bat level)

Rectal swabs were the most common specimen type to yield a positive virus sequence (92/99 bats), including 27 bats from whom sequences were confirmed in both rectal and oral swabs. In addition, seven bats had viruses detected only via their oral swabs.

df %>%
    filter(is_positive) %>%
    group_by(pos_specimen_type) %>%
    tally() %>%
    adorn_totals("row") %>%
    datatable(options = list(dom = 't'))



Positive specimen types for bats with > 1 virus detected

ids = df$AnimalID[ which(df$pos_virus_n > 1) ]
multi_bats = unique(
                 tests[ tests$AnimalID %in% ids & !is.na(tests$VirusGroup),
                       c("AnimalID", "SpecimenType", "VirusGroup")]
             )


tbls = vector("list", 3)
for(i in seq_along(ids)) {
    tbls[[ i ]] = 
        multi_bats %>%
        filter(AnimalID %in% ids[ i ]) %>%
        datatable(options = list(dom = 't'))
}

tbls[[1]]
tbls[[2]]
tbls[[3]]



Virus group by specimen type (at individual bat level)

df %>%
    filter(is_positive) %>%
    select(pos_virus, pos_specimen_type) %>%
    group_by(pos_virus, pos_specimen_type) %>%
    tally() %>%
    as_data_frame() %>%
    spread(pos_specimen_type, n) %>%
    adorn_totals(c("row", "col")) %>%
    datatable(options = list(dom = 't'))



Table 2. Viral prevalence in bats by sex, age class, season, species, and specimen type

Fisher's exact tests

# Sex, age class, season, species, and interface
# ------------------------------------------------------------------------------
dependent = "is_positive"
explanatory = c("Sex",
                "AgeClass",
                "SeasonModelled",
                "ScientificName",
                "Interface")

t1 = df %>%
    summary_factorlist(dependent, explanatory, 
                       p = TRUE,
                       p_cat = "fisher") # Fisher's Exact test 
                       

# Specimen type
# ------------------------------------------------------------------------------
df_spm = df[ , c("AnimalID", "is_positive", "pos_specimen_type")]
df_spm = rbind(df_spm, df_spm)
df_spm$specimen = rep(c("oral swab", "rectal swab"), each = nrow(df))

df_spm$specimen_pos = mapply(grepl, df_spm$specimen, df_spm$pos_specimen_type)

t2 = df_spm %>%
    summary_factorlist("specimen_pos", "specimen", 
                       p = TRUE,
                       p_cat = "fisher") # Fisher's Exact test 


# Output
# ------------------------------------------------------------------------------
ft = rbind(t1, t2)
names(ft) = c("Group", "Variable", "FALSE", "TRUE", "p")

ft$pos = as.numeric(gsub("\\s*\\([^\\)]+\\)", "", ft$`TRUE`))
ft$neg = as.numeric(gsub("\\s*\\([^\\)]+\\)", "", ft$`FALSE`))
ft$pos_percent = round((ft$pos/(ft$pos + ft$neg) * 100), 1)

ft_out = ft %>%
    mutate(pos_percent = paste0(pos_percent, "% (", pos, "/", pos + neg, ")")) %>%
    select(Group, Variable, pos_percent, p)

knitr::kable(ft_out, align = c("l", "l", "r", "r"), caption = "Table 2")
Table 2
Group Variable pos_percent p
Sex female 16.1% (66/409) 0.550
male 18.3% (33/180)
AgeClass adult 15.4% (82/533) 0.008
subadult 30.4% (17/56)
SeasonModelled Dry 5.1% (6/117) <0.001
Wet 19.7% (93/472)
ScientificName Chaerephon pumilus 6.1% (14/228) <0.001
Mops midas 6.7% (12/180)
Neoromicia cf. somalica 14.3% (1/7)
Rhinopoma hardwickii 41.4% (72/174)
Interface Building 6.5% (27/415) <0.001
Cave 41.4% (72/174)
specimen oral swab 5.8% (34/589) <0.001
rectal swab 15.6% (92/589)



Exploring age/sex/season by species

Rhinopoma

Crosstable

# Crosstable 
dependent = "is_positive"
explanatory = c("Sex",
                "AgeClass",
                "SeasonModelled")

df %>%
    filter(ScientificName %in% "Rhinopoma hardwickii") %>%
    summary_factorlist(dependent, explanatory, 
                       p=TRUE,
                       add_dependent_label=TRUE,
                       p_cat = "fisher" # Fisher's Exact test 
                       ) %>%
    knitr::kable(align=c("l", "l", "r", "r", "r"))
Dependent: is_positive FALSE TRUE p
Sex female 77 (75.5) 49 (68.1) 0.305
male 25 (24.5) 23 (31.9)
AgeClass adult 83 (81.4) 59 (81.9) 1.000
subadult 19 (18.6) 13 (18.1)
SeasonModelled Dry 10 (9.8) 0.006
Wet 92 (90.2) 72 (100.0)



Chaerephon

Crosstable

# Crosstable 
dependent = "is_positive"
explanatory = c(
                  "Sex",
                  "AgeClass",
                  "SeasonModelled")

df %>%
    filter(ScientificName %in% "Chaerephon pumilus") %>%
    summary_factorlist(dependent, explanatory, 
                       p=TRUE,
                       add_dependent_label=TRUE,
                       p_cat = "fisher" # Fisher's Exact test 
                       ) %>%
    knitr::kable(align=c("l", "l", "r", "r", "r"))
Dependent: is_positive FALSE TRUE p
Sex female 133 (62.1) 8 (57.1) 0.779
male 81 (37.9) 6 (42.9)
AgeClass adult 194 (90.7) 10 (71.4) 0.046
subadult 20 (9.3) 4 (28.6)
SeasonModelled Dry 36 (16.8) 2 (14.3) 1.000
Wet 178 (83.2) 12 (85.7)



Mops

Crosstable

# Crosstable 
dependent = "is_positive"
explanatory = c(
                  "Sex",
                  # "AgeClass", # there are no subadult for Mops
                  "SeasonModelled")

df %>%
    filter(ScientificName %in% "Mops midas") %>%
    summary_factorlist(dependent, explanatory, 
                       p=TRUE,
                       add_dependent_label=TRUE,
                       p_cat = "fisher" # Fisher's Exact test 
                       ) %>%
    knitr::kable(align=c("l", "l", "r", "r", "r"))
Dependent: is_positive FALSE TRUE p
Sex female 128 (76.2) 8 (66.7) 0.491
male 40 (23.8) 4 (33.3)
SeasonModelled Dry 59 (35.1) 3 (25.0) 0.549
Wet 109 (64.9) 9 (75.0)



Neoromicia cf. somalica

Crosstable

# Crosstable 
dependent = "is_positive"
explanatory = c(
                  "Sex"
                  # "AgeClass"
                  # "SeasonModelled"
              )

df %>%
    filter(ScientificName %in% "Neoromicia cf. somalica") %>%
    summary_factorlist(dependent, explanatory, 
                       p=TRUE,
                       add_dependent_label=TRUE,
                       p_cat = "fisher" # Fisher's Exact test 
                       ) %>%
    knitr::kable(align=c("l", "l", "r", "r", "r"))
Dependent: is_positive FALSE TRUE p
Sex female 5 (83.3) 1 (100.0) 1.000
male 1 (16.7)




Multivariable logistic regression analyses

For Rhinopoma hardwickii, Chaerephon pumilus, and Mops midas

df_filter = df %>%
    filter(! ScientificName %in% c("Neoromicia cf. somalica", "Mops midas"))

m1 = glm(is_positive ~ Sex + AgeClass + SeasonModelled + ScientificName,
           data = df_filter, family = binomial)

# summary(m1)
print_md(model_parameters(m1, exponentiate = TRUE))
Parameter Odds Ratio SE 95% CI z p
(Intercept) 0.02 0.01 (2.30e-03, 0.06) -5.37 < .001
Sex (male) 1.31 0.38 (0.73, 2.33) 0.92 0.359
AgeClass (subadult) 1.23 0.44 (0.60, 2.45) 0.58 0.562
SeasonModelled (Wet) 4.33 3.30 (1.20, 27.83) 1.92 0.055
ScientificName (Rhinopoma hardwickii) 10.22 3.30 (5.59, 19.93) 7.21 < .001
df_filter = df %>%
    filter(! ScientificName %in% c("Neoromicia cf. somalica"))


m3 = glm(is_positive ~ Sex + SeasonModelled + ScientificName,
           data = df_filter, family = binomial)

# summary(m3)
print_md(model_parameters(m3, exponentiate = TRUE))
Parameter Odds Ratio SE 95% CI z p
(Intercept) 0.02 0.01 (7.21e-03, 0.06) -6.85 < .001
Sex (male) 1.35 0.36 (0.80, 2.27) 1.13 0.260
SeasonModelled (Wet) 2.73 1.37 (1.11, 8.23) 2.01 0.044
ScientificName (Mops midas) 1.31 0.54 (0.57, 2.95) 0.66 0.512
ScientificName (Rhinopoma hardwickii) 10.53 3.38 (5.78, 20.49) 7.34 < .001
glm(is_positive ~ Sex + AgeClass + SeasonModelled + ScientificName,
    data = df_filter, family = binomial) %>%
    model_parameters(exponentiate = TRUE) %>%
    print_md()
Parameter Odds Ratio SE 95% CI z p
(Intercept) 0.02 0.01 (7.15e-03, 0.06) -6.86 < .001
Sex (male) 1.35 0.36 (0.80, 2.28) 1.13 0.259
AgeClass (subadult) 1.26 0.45 (0.62, 2.50) 0.64 0.522
SeasonModelled (Wet) 2.67 1.34 (1.09, 8.06) 1.96 0.050
ScientificName (Mops midas) 1.35 0.56 (0.59, 3.05) 0.71 0.475
ScientificName (Rhinopoma hardwickii) 10.39 3.34 (5.69, 20.23) 7.28 < .001
dd =
    df %>%
    group_by(EventMY, EventDate, SeasonModelled, ConcurrentSamplingSite) %>%
    tally() %>%
    group_by(EventMY) %>%
    mutate(id = 1:n()) %>%
    arrange(EventDate)

df_filter = left_join(df_filter, dd, by = c("EventDate", "EventMY"))